package utils;

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;

public class HTML {

	public static String getHTML(String URL) {
		
		URL url; // The URL to read
		HttpURLConnection conn; // The actual connection to the web page
		BufferedReader rd; // Used to read results from the web page
		String line; // An individual line of the web page HTML
		String result = ""; // A long string containing all the HTML
		try {
			url = new URL(URL);
			System.out.print("Fetching connection... ");
			conn = (HttpURLConnection) url.openConnection();
			conn.setRequestMethod("GET");
			rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
			while ((line = rd.readLine()) != null) {
				result += line;
				result +="\n";
			}
			rd.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		System.out.println("done!");
		return result;
	}
	
	public static String extractAndSaveXML(String html, String initTag, String endTag, String fileName) {
		
		int beginIndex = html.indexOf(initTag);
		int endIndex = html.indexOf(endTag);
		html = html.substring(beginIndex, endIndex + "</Document>".length());
		try {
		Writer w = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");  
		w.write(html);
		w.close();
		} catch (Exception e) {
			
			e.printStackTrace();
		}
		return html;
	}
}
